program define make_bpea_data

global mydate = subinstr("${S_DATE}"," ","-",.)
capture log close
quietly log using logfiles/make_bpea_data_$mydate, replace

*=====================================================================
*
* Makes dataset for BPEA project.  Starting point is the
* "wide" files in extract/raw_w_datasets/ and extract/pro_w_datasets
*
* Program run by run_bpea.do
*
* Written by Karen Dynan in January 2012

* Run routines we'll need

  capture program drop merge_in
  run merge_in
  capture program drop prev_yr_dat
  run prev_yr_dat

* Define the lists of variables to pull in

  local prolist wgt tot_dbt mort_bal_amt veh_bal_amt oth_dbt networth hsval state
  local prolist `prolist' cons_aa consxh_aa hlths hhs hcoll hage homeown dupl adds_alts
  local prolist `prolist' mort_yes_1 mort_yes_2 veh_eqty tot_fin nocars
  local prolist `prolist' mort_pymt mort_veh_pymt tot_dbt_pymt reth mort_refid
  local prolist `prolist' veh_loan_yes oth_dbt_yes moved_yes

  local rawlist headid mort_beh_yes_1 mort_fc_yes_1 mort_mod_1 mort_dist_prob_1
  local rawlist `rawlist' mort_beh_yes_2 mort_fc_yes_2 mort_mod_2 mort_dist_prob_2
  local rawlist `rawlist' fc_st_yes fc_losthm

* Pull in data

  use ../extract/pro_w_datasets/my_w

  foreach vvv in cross my `prolist' {
    disp "`vvv'"
    merge_in `vvv' pro
  }
 
  foreach vvv in `rawlist' {
    disp "`vvv'"
    merge_in `vvv' raw
  }

* income needs to be brought forward since it is off by a year

  prev_yr_dat my myprev 2009

* For testing the code
* drop if persid > 20000

  quietly compress

* Reshape the dataset into long form

  #delimit ;
  quietly reshape long myprev `prolist' `rawlist', 
          i(persid cross) j(yr 67-99 00 01 02 03 04 05 06 07 08 09 10 11);
  #delimit cr

* Recode the yr variable to resolve the Y2K problem.

  quietly replace yr = yr + 1900
  quietly recode yr (1900=2000) (1901=2001) (1902=2002) (1903=2003) (1904=2004) (1905=2005) ///
          (1906=2006) (1907=2007) (1908=2008) (1909=2009) (1910=2010) (1911=2011)
  sort persid yr

* make it a true panel in Stata

  format yr %ty
  sort persid yr
  tsset persid yr

* remove duplicates and the records for which there are no data at all (new sample additions
* prior to the addition and households that left altogether post leaving) 

  drop if dupl == 1 | dupl==.

* test the panel thing
*
* comment 1/21/12:  seems to work; most of the changes are 1 year with a few 0 and 2 years.
*                   note, though, that there are aberrations --- for example for persid == 7177
*                   head age goes from 52 to 23 between 1993 and 1994 even though the headid
*                   is the same.  I think this is just a mistake.  There are 400-500 of these
*                   guys in the panel as a whole (about 0.2-0.3 percent of the sample).

  gen d_hage = hage - l1.hage
  summ d_hage if d_hage~=. & abs(d_hage) > 3, detail
  drop d_hage

* generate some additional variables

  gen region1 = inlist(state,6,18,20,28,38,44) if state~=.
  gen region2 = inlist(state,29,31,37) if state~=.
  gen region3 = inlist(state,13,12,21,34,48) if state~=.
  gen region4 = inlist(state,14,15,22,24,26,33,40) if state~=.
  gen region5 = inlist(state,7,8,9,10,19,32,39,45,47) if state~=.
  gen region6 = inlist(state,1,16,23,41) if state~=.
  gen region7 = inlist(state,3,17,35,42) if state~=.
  gen region8 = inlist(state,2,5,11,30,25,43,27,49) if state~=.
  gen region9 = inlist(state,50,4,51,36,46) if state~=. 

  gen agecat1 = (hage < 25) if hage~=.
  gen agecat2 = (hage >=25 & hage < 35) if hage~=.
  gen agecat3 = (hage >=35 & hage < 45) if hage~=.
  gen agecat4 = (hage >=45 & hage < 55) if hage~=.
  gen agecat5 = (hage >=55 & hage < 65) if hage~=.
  gen agecat6 = (hage > 65) if hage~=.

  gen renter = (homeown==0) if homeown~=.

  gen cons_dbt = veh_bal_amt + oth_dbt
  gen vehval = max(veh_eqty + veh_bal_amt, 0) if veh_eqty~=. & veh_bal_amt~=.
  gen assets = max(hsval + vehval + tot_fin, 1) if hsval~=. & vehval~=. & tot_fin~=.
  gen d_to_a = tot_dbt/(hsval + vehval + tot_fin)
  replace d_to_a = 0 if tot_dbt==0

  gen mort_to_hv = mort_bal_amt/hsval
  replace mort_to_hv = 0 if homeown==0
 
  gen d_to_y        = tot_dbt/myprev
  gen mort_to_y     = mort_bal/myprev
  gen consdbt_to_y  = cons_dbt/myprev
  gen adds_alts_to_y = adds_alts/myprev
  gen mvp_to_y      = mort_veh_pymt/myprev
  gen consxh_to_y   = consxh_aa / myprev
  gen networth_to_y = networth / myprev
  gen ds_to_y       = tot_dbt_pymt/myprev
  gen mds_to_y      = (12*mort_pymt)/myprev

* this measure of the debt service burden allows us to avoid the fact that measurement
* error will tend to bias del(xxx/y) for people with high zzz/y.

  gen ds_to_l2y     = tot_dbt_pymt/l2.myprev

* merge in state data

  sort state yr
  merge state yr using ../Misc/state_data

* clean up

  quietly compress
  sort yr persid

  save bpea_data, replace

capture log close

end
	
